**************************************************************************************
* .do-file to complement analysis with SCE data (gender heterogeneity in expectations)
**************************************************************************************

cd "$scerawdata"
clear all

***********************************************************************
***** import raw data for core module and convert to stata format *****
***********************************************************************

clear all
import excel using FRBNY-SCE-Public-Microdata-Complete-13-16.xlsx, firstrow cellrange(A2:HL56446) sheet("Data")
save "$scewkddata/sce_13_16.dta", replace

clear all
import excel using FRBNY-SCE-Public-Microdata-Complete-17-19.xlsx, firstrow cellrange(A2:HL47683) sheet("Data")
append using "$scewkddata/sce_13_16.dta"

save  "$scewkddata/sce_core.dta", replace
erase "$scewkddata/sce_13_16.dta"

*********************************************************************************
***** import raw data for HH finance supplement and convert to stata format *****
*********************************************************************************

clear all
import excel using SCE_public_HH-finance_quarterly_microdata.xlsx, firstrow sheet("Data")

save "$scewkddata/hh_fin_raw.dta", replace


*****************************************
***** merge both modules togehter *******
*****************************************

merge 1:m userid date using "$scewkddata/sce_core.dta"
drop _merge // HH finance supplement only in august, core module is monthly!

**************************************************************
************ variable adjustments + sample restrictions ******
**************************************************************

// recode date 
tostring date, replace
gen month = substr(date,5,2)
gen year = substr(date,1,4)
destring date month year, replace

// gender for all entries of the same individual (only asked to new respondents)
bysort userid: egen gender = max(Q33)
drop if gender == .   // 32 observations
replace gender = 0 if gender == 2   // 0: male, 1: female

// marital status (married or living with a partner, yes/no)
ren Q38 married						    // new respondents
replace married = DQ38 if married == .  // repeat respondents

// age as categorial varialbe
rename _AGE_CAT age_cat
gen age_group = 1 if age_cat == "Under 40"
replace age_group = 2 if age_cat != "Under 40" & age_cat != "" 

// sample restrictions (singles and below 60 years old)
keep if married == 2
keep if age_cat != "Over 60"

// keep waves 2014-2019
keep if year >= 2014 & year <=2019

**********************************************************
********* current and expected number of HH members ******
**********************************************************

/// note: these questions are asked in HH finance supplement, hence fewer observations than in core data ///

/// for variable description, see codebook in excel file of raw data ///

// current household members (2: children >25, 3: children 18-24, 4: children 6-17, 5: children < 6, 6: other relatives, 7: other non-relatives)
egen HHmemb_now = rowtotal(q45new_2 q45new_3 q45new_4 q45new_5 q45new_6 q45new_7 ), missing 

replace HHmemb_now = . if q45newdk_1 == .  // q45newdk_1 = 1:I live alone, 0: I do not live alone
replace HHmemb_now = 1 if q45newdk_1 == 1
replace HHmemb_now = HHmemb_now + 1 if q45newdk_1 == 0 & HHmemb_now > 0 
replace HHmemb_now = 1 if q45newdk_1 == 0 & HHmemb_now == 0 

// expected household members in 12 months from now (2: children >25, 3: children 18-24, 4: children 6-27, 5: children < 6, 6: other relatives, 7: other non-relatives)
egen HHmemb_exp = rowtotal(q45new3_2 q45new3_3 q45new3_4 q45new3_5 q45new3_6 q45new3_7), missing 
				  
replace HHmemb_exp = . if q45new3dk_1 == . // q45new3dk_1 = 1:I live expect to live alone, 0: I do not expect to live alone
replace HHmemb_exp = 1 if q45new3dk_1 == 1
replace HHmemb_exp = HHmemb_exp + 1 if q45new3dk_1 == 0 & HHmemb_exp > 0
replace HHmemb_exp = 1 if q45new3dk_1 == 0 & HHmemb_exp == 0 

// set data as survey data

gen  weight_freq = weight * 1000
replace weight_freq =trunc(weight_freq)
svyset userid [pweight = weight_freq] 

// drop households with more than 20 members
replace HHmemb_now = . if HHmemb_now > 20
replace HHmemb_exp = . if HHmemb_exp > 20
		
*****************************************************************
***** Histogram of expected number of HH Members (Figure 7) *****
*****************************************************************

// maximal number of household members: 8
tab HHmemb_now 
tab HHmemb_exp


// Figure 7a: full sample 
		gen ci_low  = . 
		gen ci_high = . 
		
		foreach num of numlist 1/8 {
		foreach x of numlist 0/1 {
		gen memb`num'= 1 if gender  == `x' &  HHmemb_exp == `num'
		replace memb`num' = 0 if HHmemb_exp  != `num' & HHmemb_exp  != . & gender  == `x'

		svy: mean memb`num' if gender  == `x' 
		mat A = r(table) 
		replace ci_low  = A[rownumb(A, "ll"),1 ] if gender  == `x' &  HHmemb_exp == `num' 
		replace ci_high = A[rownumb(A, "ul"),1 ] if gender  == `x' &  HHmemb_exp == `num'
		drop memb`num'
		}
		}
		

twoway (histogram HHmemb_exp if gender == 1 [fw=weight_freq], discrete width(1) color(white) lcolor(black%70) fraction)  ///
	   (histogram HHmemb_exp if gender == 0 [fw=weight_freq], discrete width(1) color(black%30) lcolor(none)   fraction) /// 
	   (rcap ci_high ci_low HHmemb_exp if gender == 0, color(gs9)) ///	 
	   (rcap ci_high ci_low HHmemb_exp if gender == 1, color(gs9)), ///	
	   legend(order( 2 "Men" 1 "Women") size(vlarge) region(lstyle(none))) graphregion(color(white)) ///
	   xtitle("Expected HH Size", siz(vlarge)) ytitle("Fraction", size(vlarge)) ///
	   xlabel(1(1)8,labsize(vlarge)) ylabel(,labsize(vlarge)) 
	   graph export "$resultpath/expected_hhsizes_all.eps", replace
	   graph export "$resultpath/expected_hhsizes_all.pdf", replace


// Figure 7b: households below age 40 
preserve
keep if age_cat == "Under 40"
drop ci*
		gen ci_low  = . 
		gen ci_high = . 
		
		foreach num of numlist 1/8 {
		foreach x of numlist 0/1 {
		gen memb`num'= 1 if gender  == `x' &  HHmemb_exp == `num'
		replace memb`num' = 0 if HHmemb_exp  != `num' & HHmemb_exp  != . & gender  == `x'

		svy: mean memb`num' if gender  == `x' 
		mat A = r(table) 
		replace ci_low  = A[rownumb(A, "ll"),1 ] if gender  == `x' &  HHmemb_exp == `num' 
		replace ci_high = A[rownumb(A, "ul"),1 ] if gender  == `x' &  HHmemb_exp == `num'
		drop memb`num'
		}
		}
		

twoway (histogram HHmemb_exp if gender == 1 [fw=weight_freq], discrete width(1) color(white) lcolor(black%70) fraction)  ///
	   (histogram HHmemb_exp if gender == 0 [fw=weight_freq], discrete width(1) color(black%30) lcolor(none)   fraction) /// 
	   (rcap ci_high ci_low HHmemb_exp if gender == 0, color(gs9)) ///	 
	   (rcap ci_high ci_low HHmemb_exp if gender == 1, color(gs9)), ///	
	   legend(order( 2 "Men" 1 "Women") size(vlarge) region(lstyle(none))) graphregion(color(white)) ///
	   xtitle("Expected HH Size", siz(vlarge)) ytitle("Fraction", size(vlarge)) ///
	   xlabel(1(1)8,labsize(vlarge)) ylabel(,labsize(vlarge)) 
	   graph export "$resultpath/expected_hhsizes_young.eps", replace
	   graph export "$resultpath/expected_hhsizes_young.pdf", replace
restore	


	   
***************************************************************************************
***** import raw data for Labor Market Module and merge to core + HH finance data *****
***************************************************************************************

clear all
import excel using sce-labor-microdata-public.xlsx, firstrow cellrange(A2:DN25015) sheet("Data")

merge 1:m userid date using "$scewkddata/sce_core.dta"
drop _merge // _merge == 1: years 2020+2021 (not used for core analysis), _merge == 2: labor market module not asked monthly

merge 1:m userid date using "$scewkddata/hh_fin_raw.dta"
drop _merge

**************************************************************
************ variable adjustments + sample restrictions ******
**************************************************************

// recode date 
tostring date, replace
gen month = substr(date,5,2)
gen year = substr(date,1,4)
destring date month year, replace

// gender for all entries of the same individual (only asked to new respondents)
bysort userid: egen gender = max(Q33)
drop if gender == .   // 32 observations
replace gender = 0 if gender == 2   // 0: male, 1: female

// marital status (married or living with a partner, yes/no)
ren Q38 married						    // new respondents
replace married = DQ38 if married == .  // repeat respondents

// age as categorial varialbe
rename _AGE_CAT age_cat
gen age_group = 1 if age_cat == "Under 40"
replace age_group = 2 if age_cat != "Under 40" & age_cat != "" 

// reported age as continous variable (only asked for new respondents)
ren Q32 age					
bysort userid: egen age_approx = max(age) // approximate age (across months indivudals are in survey)
replace age_approx = . if age_approx < 18 | age_approx > 99

// approximate age categories 
gen age_cat_approx = .
replace age_cat_approx = 1 if age_approx <= 35
replace age_cat_approx = 2 if age_approx > 35 & age_approx <= 45
replace age_cat_approx = 3 if age_approx > 45 & age_approx <= 55
replace age_cat_approx = 4 if age_approx > 55 

// education as cateogrial variable (college degree, yes/no)
rename _EDU_CAT edu_cat
gen college = 1 if edu_cat == "College"
replace college = 0 if edu_cat != "College"

// region 
gen region = 1 if _REGION_CAT == "Midwest"
replace region = 2 if _REGION_CAT == "Northeast"
replace region = 3 if _REGION_CAT == "South"
replace region = 4 if _REGION_CAT == "West"

// expected and current earnings (from Labor Market module)
rename oo2e2 exp_earning   // what do you believe your annual earnings will be in four months (in dollars)
rename l3 current_earning  // annual earnings at main job, before taxes, including bonuses, tips, overtime pay, commissions (in dollars)

// financial wealth (from HH Finance module)
ren d16new_1 finwealth
bysort userid: egen fwealth_all = max(finwealth) // financial wealth is only asked once per indvidual

// deflate earning variables and financial wealth
merge  m:1 year month using "$cpipath/cpi_2014.dta"
drop _merge 

replace exp_earning = exp_earning / cpi_2014
replace current_earning = current_earning / cpi_2014
replace finwealth = finwealth / cpi_2014
replace fwealth_all = fwealth_all / cpi_2014

// keep waves 2014-2019
keep if year >= 2014 & year <=2019

// log earnings variables
gen log_expearn = log(exp_earning)
gen log_currearn = log(current_earning)

// ihs transformation of financial wealth
ihstrans fwealth_all

// sample restrictions (singles and below 60 years old)
keep if married == 2
keep if age_cat != "Over 60"


**************************************************************
************ Regression on expected earnings (Table 6) ******
**************************************************************

reg log_expearn gender log_currearn i.college i.year i.region i.age_group [iw=weight], r  // Column (1)

reg log_expearn gender log_currearn ihs_fwealth_all  i.college i.year i.region i.age_group [iw=weight], r // Column (2)

reg log_expearn gender log_currearn i.college i.year i.region  if age_cat_approx < 3 [iw=weight], r // Column (3)

reg log_expearn gender log_currearn i.college i.year i.region  if age_cat_approx > 2 [iw=weight], r // Column (4)

				

	
		
		




